


*** Initial information and settings

clear all
macro drop _all
program drop _all
eststo clear
version 14.2
set matsize 10000


* Set directory: set the main project root directory here,
* /Data/ folder is a first-level subdirectory
global maindir "/home/someuser/somefolder/mainprojectdir"
* Any directory to direct output to
global outputdir "/home/someuser/somefolder/output"

* Load the data
use "$maindir/Data/peerfxdata.dta" , clear

* Generate control vectors
global control   "female native i.age"
global clcontrol "clmeanfemale clmeannative clmeanage"
global slcontrol "slmeanfemale slmeannative slmeanage"

* adjust doesn't work with omitted vars, dummies or similar, anything with a prefix
tab schoolxtrackxyear, gen(sty_)
tab schoolxtrack, gen(st_)

* simple global cubic polynomial specification
generate shsnchildrensq = shsnchildren^2
generate shsnchildrencu = shsnchildren^3
generate shsnchildrenxsnchild   = shsnchildren   * snchild
generate shsnchildrensqxsnchild = shsnchildrensq * snchild
generate shsnchildrencuxsnchild = shsnchildrencu * snchild

reghdfe std_composite snchild shsnchildren shsnchildrensq shsnchildrencu shsnchildrenxsnchild shsnchildrensqxsnchild shsnchildrencuxsnchild ///
        clsizefix $control $clcontrol , absorb(schoolxtrackxyear, savefe) cl(classfix)

lincom shsnchildren + shsnchildrenxsnchild
lincom shsnchildrensq + shsnchildrensqxsnchild
lincom shsnchildrencu + shsnchildrencuxsnchild

*** parameters (betas): global polynomial
global alpha0 =   0
global alpha1 =  -0.0843493
global beta0  =  -0.1179753
global beta1  = (-0.1179753 + 0.3170233)
global gamma0 =  -0.0874245
global gamma1 = (-0.0874245 - 1.472442)
global delta0 =  -0.7979071
global delta1 = (-0.7979071 + 1.174244)


*** Fitted plots on artificial full support share

preserve

clear
set obs 100
g x=_n/100
g x2=x^2
g x3=x^3

*** parameters (betas): global polynomial
global alpha0 =   0
global alpha1 =  -0.0843493
global beta0  =  -0.1179753
global beta1  = (-0.1179753 + 0.3170233)
global gamma0 =  -0.0874245
global gamma1 = (-0.0874245 - 1.472442)
global delta0 =  -0.7979071
global delta1 = (-0.7979071 + 1.174244)

* Outcome function for SN 0
generate y0 = $alpha0 + $beta0*x + $gamma0*x2 + $delta0*x3

* Outcome function for SN 1
generate y1 = $alpha1 + $beta1*x + $gamma1*x2 + $delta1*x3

* Aggregated outcome function
generate y = x*y1 + (1-x)*y0
sort x

* Numerical derivatives
generate d1 = (y-y[_n-1])*100
generate d2 = (d1-d1[_n-1])*100

* Analytical derivatives
generate d1_alt = $alpha1 + 2*$beta1*x + 3*$gamma1*x2 + 4*$delta1*x3 + $beta0 + 2*$gamma0*x + 3*$delta0*x2 - $alpha0 - 2*$beta0*x - 3*$gamma0*x2 - 4*$delta0*x3
generate d2_alt = 2*$beta1 + 6*$gamma1*x + 12*$delta1*x2 + 2*$gamma0 + 6*$delta0*x - 2*$beta0 - 6*$gamma0*x - 12*$delta0*x2


*** Plots

*** aggregated outcome function
twoway (line y0 y1 y x, sort lc(black black black) lp(shortdash dash solid)), ///
  xline(0.254) xtitle("Proportion of peers with SN") ytitle("Residual test score") ///
  legend(order(1 "Non-SN students" 2 "SN students" 3 "Overall function") col(3) size(*0.85) symxsize(5) region(color(white))) ///
  title("A. Outcome functions", size(*0.9) color(black)) ///
  graphregion(color(white)) ylab(, nogrid) name(graphy, replace)

*** first derivative
twoway line d1_alt x, sort lc(black) yline(0, lpattern(dash) lcolor(black)) ///
  xtitle("Proportion of peers with SN") ytitle("First derivative") ///
  title("B. First derivative", size(*0.9) color(black)) ///
  graphregion(color(white)) ylab(, nogrid) name(graphd1, replace)

*** second derivative
twoway (line d2_alt x , lc(black)) (hist , disc) ///
  yline(0, lpattern(dash) lcolor(black)) ///
  xtitle("Proportion of peers with SN") ytitle("Second derivative") ///
  title("C. Second derivative", size(*0.9) color(black)) ///
  graphregion(color(white)) ylab(, nogrid) name(graphd2, replace)

*** Combined graph for paper
grc1leg graphy graphd1 graphd2, col(1) ysize(11) ///
  graphregion(color(white))
graph display, ysize(11) name(graphcbd, replace)

graph export "$outputdir/figures/reallocation.pdf" , replace

restore


****************** QUANTIFY GAINS WITHIN SCHOOL-TRACK-YEAR ****************

*1) status quo prediction

g y0_statusquo = $alpha0 + $beta0*shsnchildren + $gamma0*shsnchildrensq + $delta0*shsnchildrencu
g y1_statusquo = $alpha1 + $beta1*shsnchildren + $gamma1*shsnchildrensq + $delta1*shsnchildrencu
g y_statusquo  = shsnchildren*y1_statusquo + (1-shsnchildren)*y0_statusquo


*2) shares of sn within cell with equal distribution

bysort schoolxtrackxyear: egen meansn = mean(shsnchildren)
g meansnsq = meansn^2
g meansncu = meansn^3


*3) prediction with equal shares

g y0_equal = $alpha0 + $beta0*meansn + $gamma0*meansnsq + $delta0*meansncu
g y1_equal = $alpha1 + $beta1*meansn + $gamma1*meansnsq + $delta1*meansncu
g y_equal  = meansn*y1_equal + (1-meansn)*y0_equal


*4) average gain per student

g gain   = y_equal  - y_statusquo
g gain_0 = y0_equal - y0_statusquo
g gain_1 = y1_equal - y1_statusquo

sum gain gain_0 gain_1
sum shsnchildren meansn

* Gain by cell (school-track-year)

bys schoolxtrackxyear: egen gainbycell = mean(gain)

* Get the x-axis

xtset classfix
forvalues j = 1(1)1398 {
  qui xtsum shsnchildren if schoolxtrackxyear==`j'
  qui gen btwsd_`j' = `r(sd_b)' if schoolxtrackxyear==`j'
}
egen btwsd = rowtotal(btwsd_*), missing
drop btwsd_*
gen btwvar = btwsd^2

sum btwsd btwvar

preserve
duplicates drop schoolxtrackxyear , force
twoway (scatter gainbycell btwsd, sort msymbol(circle_hollow) mcolor(black) mlwidth(thin)) ///
        , ///
        xtitle("Variation between classes within a school-track-year", size(*.9)) ///
        ytitle("Estimated gain in test scores", size(*.9)) ///
        graphregion(color(white)) ylab(, nogrid)
graph export "$outputdir/figures/gainbycell.pdf" , replace
restore
